﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using LDA;

namespace LDAGibbsSampling
{
    class Program
    {
        static void Main(string[] args)
        {
            string[] inFiles = { 
                "..\\..\\..\\..\\data\\vocab.txt", 
                "..\\..\\..\\..\\data\\cs.index.txt", 
                //"..\\..\\..\\..\\data\\computer.index.txt",
                "..\\..\\..\\..\\data\\infotheory.index.txt", 
                "..\\..\\..\\..\\data\\InfoRet.index.txt",
                "..\\..\\..\\..\\data\\datamining.index.txt", 
                "..\\..\\..\\..\\data\\cryptography.index.txt", 
                "..\\..\\..\\..\\data\\cryptanalysis.index.txt",
                "..\\..\\..\\..\\data\\historycrypto.index.txt",
                "..\\..\\..\\..\\data\\database.index.txt"
            };

            LinkedList<string> vocabList = new LinkedList<string>();
            TextReader dictFile = new StreamReader(inFiles[0]);
            int K = 2;    //number of topics

            string line = dictFile.ReadLine();
            while (line != null)
            {
                if (line.Equals(""))
                {
                    line = dictFile.ReadLine();
                    continue;
                }
                string[] tokens = line.Split('&');
                vocabList.AddLast(tokens[1].Trim());
                line = dictFile.ReadLine();
            }
            dictFile.Close();

            string[] vocabArray = vocabList.ToArray();
            int V = vocabArray.Length;  //number of words(terms) in corpus
            // Documents of variable length
            int[][] docs = new int[inFiles.Length - 1][];
            
            for (int i = 1; i < inFiles.Length; i++)
            {
                LinkedList<int> docwords = new LinkedList<int>();
                TextReader tr = new StreamReader(inFiles[i]);
                line = tr.ReadLine();
                while (line != null)
                {
                    if (line.Equals(""))
                    {
                        line = tr.ReadLine();
                        continue;
                    }
                    string[] tokens = line.Split('&');
                    docwords.AddLast(int.Parse(tokens[0]) - 1);
                    line = tr.ReadLine();
                }
                tr.Close();
                docs[i - 1] = docwords.ToArray();
            }

            LDAGibbs lda = new LDAGibbs(1, 1, 2, docs, vocabArray, 1000);
            lda.MCMC();

            //calculate parameters
            double[,] phi = lda.Phi;
            double[,] theta = lda.Theta;
            Result[][] topicWords = lda.TopicWords;

            for (int k = 0; k < K; k++)
            {
                Console.WriteLine("Topic {0}", k);
                for (int v = 0; v < 10; v++)
                {
                    Console.WriteLine("{0} & {1} & {2}", v + 1, topicWords[k][v].Word, topicWords[k][v].Prob);
                }
                Console.WriteLine();
            }

            for (int m = 0; m < docs.Length; m++)
            {
                Console.Write("{0}", inFiles[m+1]);
                for (int k = 0; k < K; k++)
                {
                    Console.Write("\t{0}", theta[m, k]);
                }
                Console.WriteLine();
            }

            Console.WriteLine("Press any key");
            Console.ReadKey();
        }
    }
}
